* ==============================================================================
* Data: experimental data 
* Project: Multiple Switching and Data Quality in the Multiple Price List
*    
*
* Last updated: September 2019
* ==============================================================================

********************************************************************************
*** Data Cleaning and Recoding
********************************************************************************

* Variable Labels
********************************************************************************
label variable female "Female"
label variable id "Subject id"
label variable grade "Year in school"
label variable birthyear "Year of birth"
label variable birthmonth "Month of birth"
label variable birthday "Day of birth"
label variable order "Order and treatment status indicator"
label variable ls "Choice in lottery selection task"
label variable mpl_raw "Choices in each decision in MPL; 1: risky; 2: safe"
label variable childnumber "Number of children in household"
label variable familynumber "Number of family members in household"
label variable math "Score on standardized math exam"
label variable chinese "Score on standarized Chinese exam"

* generate treatment and task order variable
********************************************************************************
* generate treatment variable
generate nudge = 0
replace nudge =  1 if (order == 12 | order == 22)

* generate order variable
generate ls_first = 0
replace ls_first = 1 if order==11|order==12


* recode mpl_raw in 3 ways
********************************************************************************
tostring(mpl_raw), replace

* let mpl_total equal the total number of risky choices; higher value indicates more risk tolerant
egen mpl_total = noccur(mpl_raw),string("1")

* let mpl_fs equal the number of risky choices before first switching to the safe choice; higher value indicates more risk tolerant
generate mpl_fs=.
replace mpl_fs=0 if substr(mpl_raw,1,1)=="2"
replace mpl_fs=1 if substr(mpl_raw,1,1)=="1"
replace mpl_fs=2 if substr(mpl_raw,1,2)=="11" 
replace mpl_fs=3 if substr(mpl_raw,1,3)=="111" 
replace mpl_fs=4 if substr(mpl_raw,1,4)=="1111" 
replace mpl_fs=5 if substr(mpl_raw,1,5)=="11111" 
replace mpl_fs=6 if substr(mpl_raw,1,6)=="111111"

* Define multiple switching
generate switch=0
replace switch=1 if mpl_fs==0&substr(mpl_raw,1,6)!="222222"
replace switch=1 if mpl_fs==1&substr(mpl_raw,2,6)!="22222"
replace switch=1 if mpl_fs==2&substr(mpl_raw,3,6)!="2222"
replace switch=1 if mpl_fs==3&substr(mpl_raw,4,6)!="222"
replace switch=1 if mpl_fs==4&substr(mpl_raw,5,6)!="22"

* let mpl_avg equal the average of the number of risky choices before first switching to the safe choice
* and the number of risky choices before the last switch to the safe choice; higher value indicates more risk tolerant
generate mpl_avg=.
replace mpl_avg = mpl_fs if switch == 0

replace mpl_avg = 4.5 if mpl_raw == "111221" //3+6
replace mpl_avg = 3.5 if mpl_raw == "112112" //2+5
replace mpl_avg = 4 if mpl_raw == "112121" //2+6
replace mpl_avg = 4 if mpl_raw == "112211" //2+6
replace mpl_avg = 3.5  if mpl_raw == "112212" //2+5

replace mpl_avg = 3.5  if mpl_raw ==  "121111" //1+6
replace mpl_avg = 3 if mpl_raw == "121112" //1+5
replace mpl_avg = 3.5 if mpl_raw == "121121" //1+6
replace mpl_avg = 2.5 if mpl_raw == "121122" //1+4
replace mpl_avg = 3.5  if mpl_raw == "121211" //1+6
 
replace mpl_avg = 3 if mpl_raw == "121212" //1+5
replace mpl_avg = 3.5 if mpl_raw == "121221" //1+6
replace mpl_avg = 3.5 if mpl_raw== "122111" //1+6
replace mpl_avg = 3 if mpl_raw =="122112" //1+5
replace mpl_avg = 3.5 if mpl_raw =="122121" //1+6
 
replace mpl_avg = 2.5 if mpl_raw == "122122" //1+4 
replace mpl_avg = 3 if mpl_raw == "212221" //0 +6
replace mpl_avg = 3 if mpl_raw =="221121" //0+6
replace mpl_avg = 2 if mpl_raw =="221122"  //0+4 
replace mpl_avg = 3 if mpl_raw =="221211" //0+6
replace mpl_avg = 3 if mpl_raw== "222211" //0+6 



* demographics and ses 
********************************************************************************
generate birthday2015 = mdy(birthmonth, birthday, birthyear)
generate age = (mdy(1,1,2015) -birthday2015)/365.25


* Test scores
********************************************************************************
egen math_std7th = std(math) if grade == 7
egen math_std8th = std(math) if grade == 8
egen math_std9th = std(math) if grade == 9

generate math_std = .
replace math_std = math_std7th if grade == 7
replace math_std = math_std8th if grade == 8
replace math_std = math_std9th if grade == 9


egen verbal_std7th = std(chinese) if grade == 7
egen verbal_std8th = std(chinese) if grade == 8
egen verbal_std9th = std(chinese) if grade == 9

generate verbal_std = .
replace verbal_std = verbal_std7th if grade == 7
replace verbal_std = verbal_std8th if grade == 8
replace verbal_std = verbal_std9th if grade == 9

generate all = math + chinese

generate all_std = .
replace all_std = (math_std+verbal_std)/2

summarize all_std, detail
local median r(p50)
generate above_med = all_std>`median'
replace above_med = . if all_std==.

summarize all_std if nudge==0, detail
local median_control = r(p50)
summarize all_std if nudge==1, detail
local median_treatment = r(p50)

generate above_med2 = 0
replace above_med2 = 1 if all_std>`median_control' & nudge==0
replace above_med2 = 1 if all_std>`median_treatment' & nudge==1 
replace above_med2 = . if all_std==.


* Grade dummies
********************************************************************************
generate gradedum1 = 0
replace gradedum1 = 1 if grade == 7

generate gradedum2 = 0
replace gradedum2 = 1 if grade == 8

generate gradedum3 = 0
replace gradedum3 = 1 if grade == 9


* Variable Labels
********************************************************************************
label variable age "Age"
label variable traveltime "Distance from home to school (=1 if less than or equal to 30min walk)"
label variable medu "Mother's educational attainment (=1 if less than or equal to primary)"
label variable mocc "Mother's occupation (=1 if agricultural)"
label variable income "Monthly hh income (=1 if less than or equal to 750RMB)"
label variable allowance "Monthly allowance (=1 if less than or equal to 300RMB)"
label variable gradedum1 "Grade 7 dummy 1"
label variable gradedum2 "Grade 8 dummy 2"
label variable gradedum3 "Grade 9 dummy 3"
label variable all_std "Test score"
label variable switch "Multiple switcher"
label variable above_med "High cognitive ability"
